{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import sys\n",
    "sys.path.append(\"../\")\n",
    "import pickle\n",
    "import numpy as np\n",
    "from tqdm import tqdm_notebook\n",
    "from prefetch_generator import BackgroundGenerator\n",
    "from matplotlib import pylab as plt\n",
    "from IPython.display import clear_output\n",
    "import os\n",
    "from joblib import Parallel, delayed\n",
    "from tqdm import tqdm\n",
    "import nltk\n",
    "from glob import glob\n",
    "from joblib import Parallel, delayed\n",
    "from collections import Counter\n",
    "from layers import *\n",
    "from utils import *\n",
    "from models import *\n",
    "import json\n",
    "import tensorflow as tf\n",
    "tfe = tf.contrib.eager \n",
    "config = tf.ConfigProto(\n",
    "    gpu_options=tf.GPUOptions(\n",
    "        visible_device_list=\"0\"))\n",
    "config.gpu_options.allow_growth = True\n",
    "session = tf.Session(config=config)\n",
    "tf.enable_eager_execution(config=config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "checkpoint_dir = \"../models/path_to_dir\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trn_data = read_pickle(\"../dataset/nl/train.pkl\")\n",
    "vld_data = read_pickle(\"../dataset/nl/valid.pkl\")\n",
    "tst_data = read_pickle(\"../dataset/nl/test.pkl\")\n",
    "code_i2w = read_pickle(\"../dataset/code_i2w.pkl\")\n",
    "code_w2i = read_pickle(\"../dataset/code_w2i.pkl\")\n",
    "nl_i2w = read_pickle(\"../dataset/nl_i2w.pkl\")\n",
    "nl_w2i = read_pickle(\"../dataset/nl_w2i.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trn_x, trn_y_raw = zip(*trn_data.items())\n",
    "vld_x, vld_y_raw = zip(*vld_data.items())\n",
    "tst_x, tst_y_raw = zip(*tst_data.items())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trn_y = [[nl_w2i[t] if t in nl_w2i.keys() else nl_w2i[\"<UNK>\"] for t in l] for l in trn_y_raw]\n",
    "vld_y = [[nl_w2i[t] if t in nl_w2i.keys() else nl_w2i[\"<UNK>\"] for t in l] for l in vld_y_raw]\n",
    "tst_y = [[nl_w2i[t] if t in nl_w2i.keys() else nl_w2i[\"<UNK>\"] for t in l] for l in tst_y_raw]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model defining\n",
    "class Model(BaseModel):\n",
    "    def __init__(self, dim_E, dim_F, dim_rep, in_vocab, out_vocab, layer=1, dropout=0.5, lr=1e-4):\n",
    "        super(Model, self).__init__(dim_E, dim_F, dim_rep, in_vocab, out_vocab, layer, dropout, lr)\n",
    "        self.E = TreeEmbeddingLayer(dim_E, in_vocab)\n",
    "        self.encoder = ChildSumLSTMLayer(dim_E, dim_rep)\n",
    "    \n",
    "    def encode(self, trees):\n",
    "        trees = self.E(trees)\n",
    "        trees = self.encoder(trees)\n",
    "        \n",
    "        hx = tf.stack([tree.h for tree in trees])\n",
    "        cx = tf.stack([tree.c for tree in trees])\n",
    "        ys = [tf.stack([node.h for node in traverse(tree)]) for tree in trees]\n",
    "        \n",
    "        return ys, [hx, cx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# setting model\n",
    "model = Model(512, 512, 512, len(code_w2i), len(nl_w2i), dropout=0.5, lr=1e-4)\n",
    "epochs = 15\n",
    "batch_size = 64\n",
    "os.makedirs(checkpoint_dir, exist_ok=True)\n",
    "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt\")\n",
    "root = tfe.Checkpoint(model=model)\n",
    "history = {\"loss\":[], \"loss_val\":[]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setting Data Generator\n",
    "trn_gen = Datagen_tree(trn_x, trn_y, batch_size, code_w2i, nl_i2w, train=True)\n",
    "vld_gen = Datagen_tree(vld_x, vld_y, batch_size, code_w2i, nl_i2w, train=False)\n",
    "tst_gen = Datagen_tree(tst_x, tst_y, batch_size, code_w2i, nl_i2w, train=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# training\n",
    "for epoch in range(epochs):\n",
    "    \n",
    "    # train\n",
    "    loss_tmp = []\n",
    "    t = tqdm(trn_gen(epoch))\n",
    "    for x, y, _, _ in t:\n",
    "        loss_tmp.append(model.train_on_batch(x, y))\n",
    "        t.set_description(\"epoch:{:03d}, loss = {}\".format(epoch + 1, np.mean(loss_tmp)))\n",
    "    history[\"loss\"].append(np.sum(loss_tmp) / len(t))\n",
    "    \n",
    "    loss_tmp = []\n",
    "    t = tqdm(vld_gen(epoch))\n",
    "    for x, y, _, _ in t:\n",
    "        loss_tmp.append(model.evaluate_on_batch(x, y))\n",
    "        t.set_description(\"epoch:{:03d}, loss_val = {}\".format(epoch + 1, np.mean(loss_tmp)))\n",
    "    history[\"loss_val\"].append(np.sum(loss_tmp) / len(t))\n",
    "    \n",
    "    # checkpoint\n",
    "    if history[\"loss_val\"][-1] == min(history[\"loss_val\"]):\n",
    "        checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt\")\n",
    "        root.save(file_prefix=checkpoint_prefix)\n",
    "    \n",
    "    # print\n",
    "    clear_output()\n",
    "    for key, val in history.items():\n",
    "        if \"loss\" in key:\n",
    "            plt.plot(val, label=key)\n",
    "    plt.legend()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "root.restore(tf.train.latest_checkpoint(checkpoint_dir))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "preds = []\n",
    "trues = []\n",
    "for x, y, _, y_raw in tqdm(tst_gen(0)):\n",
    "    res = model.translate(x, nl_i2w, nl_w2i)\n",
    "    preds += res\n",
    "    trues += [s[1:-1] for s in y_raw]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bleus = Parallel(n_jobs=-1)(delayed(bleu4)(t, p) for t, p in tqdm(list(zip(trues, preds))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "history[\"bleus\"] = bleus\n",
    "history[\"preds\"] = preds\n",
    "history[\"trues\"] = trues\n",
    "history[\"numbers\"] = [int(x.split(\"/\")[-1]) for x in tst_x]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(os.path.join(checkpoint_dir, \"history.json\"), \"w\") as f:\n",
    "    json.dump(history, f)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
